Welcome!
For this EDA, this research will use UK Road Safety: Traffic Accidents (2005-2017).
reference: https://www.kaggle.com/tsiaras/uk-road-safety-accidents-and-vehicles
These files provide detailed road safety data about the circumstances of personal injury road accidents in GB from 1979, the types of vehicles involved and the consequential casualties. The statistics relate only to personal injury accidents on public roads that are reported to the police, and subsequently recorded, using the STATS19 accident reporting form.
## [1] "Accident_Index"
## [2] "X1st_Road_Class"
## [3] "X1st_Road_Number"
## [4] "X2nd_Road_Class"
## [5] "X2nd_Road_Number"
## [6] "Accident_Severity"
## [7] "Carriageway_Hazards"
## [8] "Date"
## [9] "Day_of_Week"
## [10] "Did_Police_Officer_Attend_Scene_of_Accident"
## [11] "Junction_Control"
## [12] "Junction_Detail"
## [13] "Latitude"
## [14] "Light_Conditions"
## [15] "Local_Authority_.District."
## [16] "Local_Authority_.Highway."
## [17] "Location_Easting_OSGR"
## [18] "Location_Northing_OSGR"
## [19] "Longitude"
## [20] "LSOA_of_Accident_Location"
## [21] "Number_of_Casualties"
## [22] "Number_of_Vehicles"
## [23] "Pedestrian_Crossing.Human_Control"
## [24] "Pedestrian_Crossing.Physical_Facilities"
## [25] "Police_Force"
## [26] "Road_Surface_Conditions"
## [27] "Road_Type"
## [28] "Special_Conditions_at_Site"
## [29] "Speed_limit"
## [30] "Time"
## [31] "Urban_or_Rural_Area"
## [32] "Weather_Conditions"
## [33] "Year"
## [34] "InScotland"
## 'data.frame': 2047256 obs. of 34 variables:
## $ Accident_Index : chr "200501BS00001" "200501BS00002" "200501BS00003" "200501BS00004" ...
## $ X1st_Road_Class : chr "A" "B" "C" "A" ...
## $ X1st_Road_Number : int 3218 450 0 3220 0 0 0 315 3212 450 ...
## $ X2nd_Road_Class : chr NA "C" NA NA ...
## $ X2nd_Road_Number : int 0 0 0 0 0 0 0 0 304 0 ...
## $ Accident_Severity : chr "Serious" "Slight" "Slight" "Slight" ...
## $ Carriageway_Hazards : chr "None" "None" "None" "None" ...
## $ Date : chr "2005-01-04" "2005-01-05" "2005-01-06" "2005-01-07" ...
## $ Day_of_Week : chr "Tuesday" "Wednesday" "Thursday" "Friday" ...
## $ Did_Police_Officer_Attend_Scene_of_Accident: int 1 1 1 1 1 1 1 1 1 1 ...
## $ Junction_Control : chr "Data missing or out of range" "Auto traffic signal" "Data missing or out of range" "Data missing or out of range" ...
## $ Junction_Detail : chr "Not at junction or within 20 metres" "Crossroads" "Not at junction or within 20 metres" "Not at junction or within 20 metres" ...
## $ Latitude : num 51.5 51.5 51.5 51.5 51.5 ...
## $ Light_Conditions : chr "Daylight" "Darkness - lights lit" "Darkness - lights lit" "Daylight" ...
## $ Local_Authority_.District. : chr "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" ...
## $ Local_Authority_.Highway. : chr "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" "Kensington and Chelsea" ...
## $ Location_Easting_OSGR : int 525680 524170 524520 526900 528060 524770 524220 525890 527350 524550 ...
## $ Location_Northing_OSGR : int 178240 181650 182240 177530 179040 181160 180830 179710 177650 180810 ...
## $ Longitude : num -0.191 -0.212 -0.206 -0.174 -0.157 ...
## $ LSOA_of_Accident_Location : chr "E01002849" "E01002909" "E01002857" "E01002840" ...
## $ Number_of_Casualties : int 1 1 1 1 1 1 1 2 2 5 ...
## $ Number_of_Vehicles : int 1 1 2 1 1 2 2 1 2 2 ...
## $ Pedestrian_Crossing.Human_Control : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Pedestrian_Crossing.Physical_Facilities : int 1 5 0 0 0 0 0 0 5 8 ...
## $ Police_Force : chr "Metropolitan Police" "Metropolitan Police" "Metropolitan Police" "Metropolitan Police" ...
## $ Road_Surface_Conditions : chr "Wet or damp" "Dry" "Dry" "Dry" ...
## $ Road_Type : chr "Single carriageway" "Dual carriageway" "Single carriageway" "Single carriageway" ...
## $ Special_Conditions_at_Site : chr "None" "None" "None" "None" ...
## $ Speed_limit : int 30 30 30 30 30 30 30 30 30 30 ...
## $ Time : chr "17:42" "17:36" "00:15" "10:35" ...
## $ Urban_or_Rural_Area : chr "Urban" "Urban" "Urban" "Urban" ...
## $ Weather_Conditions : chr "Raining no high winds" "Fine no high winds" "Fine no high winds" "Fine no high winds" ...
## $ Year : int 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
## $ InScotland : chr "No" "No" "No" "No" ...
traffic$X1st_Road_Class <- as.factor(traffic$X1st_Road_Class)
traffic$X1st_Road_Number <- as.factor(traffic$X1st_Road_Number)
traffic$X2nd_Road_Class <- as.factor(traffic$X2nd_Road_Class)
traffic$X2nd_Road_Number <- as.factor(traffic$X2nd_Road_Number)
traffic$Accident_Severity <- as.factor(traffic$Accident_Severity)
traffic$Carriageway_Hazards <- as.factor(traffic$Carriageway_Hazards)
traffic$Day_of_Week <- as.factor(traffic$Day_of_Week)
traffic$Did_Police_Officer_Attend_Scene_of_Accident <- as.factor(traffic$Did_Police_Officer_Attend_Scene_of_Accident)
traffic$Junction_Control <- as.factor(traffic$Junction_Control)
traffic$Junction_Detail <- as.factor(traffic$Junction_Detail)
traffic$Light_Conditions <- as.factor(traffic$Light_Conditions)
traffic$Local_Authority_.District. <- as.factor(traffic$Local_Authority_.District.)
traffic$Local_Authority_.Highway. <- as.factor(traffic$Local_Authority_.Highway.)
traffic$Location_Easting_OSGR <- as.factor(traffic$Location_Easting_OSGR)
traffic$Location_Northing_OSGR <- as.factor(traffic$Location_Northing_OSGR)
traffic$LSOA_of_Accident_Location <- as.factor(traffic$LSOA_of_Accident_Location)
traffic$Police_Force <- as.factor(traffic$Police_Force)
traffic$Road_Surface_Conditions <- as.factor(traffic$Road_Surface_Conditions)
traffic$Road_Type <- as.factor(traffic$Road_Type)
traffic$Special_Conditions_at_Site <- as.factor(traffic$Special_Conditions_at_Site)
traffic$Speed_limit <- as.factor(traffic$Speed_limit)
traffic$Urban_or_Rural_Area <- as.factor(traffic$Urban_or_Rural_Area)
traffic$Weather_Conditions <- as.factor(traffic$Weather_Conditions)
traffic$InScotland <- as.factor(traffic$InScotland)## Rows: 2,047,256
## Columns: 34
## $ Accident_Index <chr> "200501BS00001", "20050...
## $ X1st_Road_Class <fct> A, B, C, A, Unclassifie...
## $ X1st_Road_Number <fct> 3218, 450, 0, 3220, 0, ...
## $ X2nd_Road_Class <fct> NA, C, NA, NA, NA, NA, ...
## $ X2nd_Road_Number <fct> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Accident_Severity <fct> Serious, Slight, Slight...
## $ Carriageway_Hazards <fct> None, None, None, None,...
## $ Date <date> 2005-01-04, 2005-01-05...
## $ Day_of_Week <fct> Tuesday, Wednesday, Thu...
## $ Did_Police_Officer_Attend_Scene_of_Accident <fct> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control <fct> Data missing or out of ...
## $ Junction_Detail <fct> Not at junction or with...
## $ Latitude <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions <fct> Daylight, Darkness - li...
## $ Local_Authority_.District. <fct> Kensington and Chelsea,...
## $ Local_Authority_.Highway. <fct> Kensington and Chelsea,...
## $ Location_Easting_OSGR <fct> 525680, 524170, 524520,...
## $ Location_Northing_OSGR <fct> 178240, 181650, 182240,...
## $ Longitude <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location <fct> E01002849, E01002909, E...
## $ Number_of_Casualties <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force <fct> Metropolitan Police, Me...
## $ Road_Surface_Conditions <fct> Wet or damp, Dry, Dry, ...
## $ Road_Type <fct> Single carriageway, Dua...
## $ Special_Conditions_at_Site <fct> None, None, None, None,...
## $ Speed_limit <fct> 30, 30, 30, 30, 30, 30,...
## $ Time <Period> 17H 42M 0S, 17H 36M ...
## $ Urban_or_Rural_Area <fct> Urban, Urban, Urban, Ur...
## $ Weather_Conditions <fct> Raining no high winds, ...
## $ Year <int> 2005, 2005, 2005, 2005,...
## $ InScotland <fct> No, No, No, No, No, No,...
NULL values## Accident_Index
## 0
## X1st_Road_Class
## 0
## X1st_Road_Number
## 2
## X2nd_Road_Class
## 844272
## X2nd_Road_Number
## 17593
## Accident_Severity
## 0
## Carriageway_Hazards
## 0
## Date
## 0
## Day_of_Week
## 0
## Did_Police_Officer_Attend_Scene_of_Accident
## 278
## Junction_Control
## 0
## Junction_Detail
## 0
## Latitude
## 174
## Light_Conditions
## 0
## Local_Authority_.District.
## 0
## Local_Authority_.Highway.
## 0
## Location_Easting_OSGR
## 164
## Location_Northing_OSGR
## 164
## Longitude
## 175
## LSOA_of_Accident_Location
## 0
## Number_of_Casualties
## 0
## Number_of_Vehicles
## 0
## Pedestrian_Crossing.Human_Control
## 2920
## Pedestrian_Crossing.Physical_Facilities
## 3560
## Police_Force
## 0
## Road_Surface_Conditions
## 0
## Road_Type
## 0
## Special_Conditions_at_Site
## 0
## Speed_limit
## 37
## Time
## 156
## Urban_or_Rural_Area
## 0
## Weather_Conditions
## 0
## Year
## 0
## InScotland
## 53
## Rows: 2,042,570
## Columns: 28
## $ Accident_Index <chr> "200501BS00001", "20050...
## $ Accident_Severity <fct> Serious, Slight, Slight...
## $ Carriageway_Hazards <fct> None, None, None, None,...
## $ Date <date> 2005-01-04, 2005-01-05...
## $ Day_of_Week <fct> Tuesday, Wednesday, Thu...
## $ Did_Police_Officer_Attend_Scene_of_Accident <fct> 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Junction_Control <fct> Data missing or out of ...
## $ Junction_Detail <fct> Not at junction or with...
## $ Latitude <dbl> 51.48910, 51.52007, 51....
## $ Light_Conditions <fct> Daylight, Darkness - li...
## $ Local_Authority_.District. <fct> Kensington and Chelsea,...
## $ Local_Authority_.Highway. <fct> Kensington and Chelsea,...
## $ Longitude <dbl> -0.191170, -0.211708, -...
## $ LSOA_of_Accident_Location <fct> E01002849, E01002909, E...
## $ Number_of_Casualties <int> 1, 1, 1, 1, 1, 1, 1, 2,...
## $ Number_of_Vehicles <int> 1, 1, 2, 1, 1, 2, 2, 1,...
## $ Pedestrian_Crossing.Human_Control <int> 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Pedestrian_Crossing.Physical_Facilities <int> 1, 5, 0, 0, 0, 0, 0, 0,...
## $ Police_Force <fct> Metropolitan Police, Me...
## $ Road_Surface_Conditions <fct> Wet or damp, Dry, Dry, ...
## $ Road_Type <fct> Single carriageway, Dua...
## $ Special_Conditions_at_Site <fct> None, None, None, None,...
## $ Speed_limit <fct> 30, 30, 30, 30, 30, 30,...
## $ Time <Period> 17H 42M 0S, 17H 36M ...
## $ Urban_or_Rural_Area <fct> Urban, Urban, Urban, Ur...
## $ Weather_Conditions <fct> Raining no high winds, ...
## $ Year <int> 2005, 2005, 2005, 2005,...
## $ InScotland <fct> No, No, No, No, No, No,...
traffic %>%
filter(Accident_Severity == "Fatal") %>%
filter(Junction_Control != "Data missing or out of range") %>%
group_by(Urban_or_Rural_Area, Light_Conditions, Junction_Control, Junction_Detail) %>%
count(sort = TRUE) %>%
head(5)traffic %>%
filter(Accident_Severity == "Fatal") %>%
group_by(InScotland) %>%
count(sort = TRUE) %>%
head(5)traffic %>%
filter(Accident_Severity == "Fatal") %>%
group_by(Accident_Severity, Road_Type, Road_Surface_Conditions) %>%
count(sort = TRUE) %>%
head(5)traffic %>%
group_by(Accident_Severity = "Fatal", Weather_Conditions, Light_Conditions, Speed_limit) %>%
count(sort = TRUE) %>%
head(5)traffic %>%
filter(Accident_Severity == "Fatal") %>%
group_by(Day_of_Week) %>%
count(sort = TRUE) %>%
head(5)traffic %>%
filter(Accident_Severity == "Fatal") %>%
group_by(Local_Authority_.District., Police_Force) %>%
count(sort = TRUE) %>%
head(5)